Benchmarking RDF against MDAnalysis

The algorithms in freud are highly efficient and rely on parallelized C++ code. Below, we show a benchmark of freud.density.RDF against MDAnalysis.analysis.rdf. This benchmark was run on an Intel(R) Core(TM) i3-8100B CPU @ 3.60GHz.

[1]:
import freud
import gsd
import MDAnalysis
import MDAnalysis.analysis.rdf
import multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
import timeit
from tqdm import tqdm
[2]:
trajectory_filename = 'data/rdf_benchmark.gsd'
rmax = 5
rmin = 0.1
nbins = 75
[3]:
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
frame = trajectory[0]
topology = MDAnalysis.core.topology.Topology(n_atoms=frame.n_atoms)
u = MDAnalysis.as_Universe(topology, trajectory_filename)

rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms,
                                       nbins=nbins,
                                       range=(rmin, rmax)).run()
[4]:
plt.plot(rdf.bins, rdf.rdf)
plt.show()
../../_images/examples_examples_Benchmarking_RDF_against_MDAnalysis_4_0.png
[5]:
with gsd.hoomd.open(trajectory_filename, 'rb') as traj:
    freud_rdf = freud.density.RDF(rmax=rmax, dr=(rmax-rmin)/nbins, rmin=rmin)
    for frame in traj:
        freud_rdf.accumulate(frame.configuration.box, frame.particles.position)
[6]:
plt.plot(freud_rdf.R, freud_rdf.RDF)
plt.show()
../../_images/examples_examples_Benchmarking_RDF_against_MDAnalysis_6_0.png

Timing Functions

[7]:
def time_statement(stmt, repeat=3, number=1, **kwargs):
    timer = timeit.Timer(stmt=stmt, globals=kwargs)
    times = timer.repeat(repeat, number)
    return np.mean(times), np.std(times)
[8]:
def time_freud_rdf(trajectory_filename, rmax, rmin, nbins):
    code = """
rdf = freud.density.RDF(rmax=rmax, dr=(rmax-rmin)/nbins, rmin=rmin)
for frame in trajectory:
    rdf.accumulate(frame.configuration.box, frame.particles.position)"""
    with gsd.hoomd.open(trajectory_filename, 'rb') as trajectory:
        return time_statement(code, freud=freud, trajectory=trajectory, rmax=rmax, rmin=rmin, nbins=nbins)
[9]:
def time_mdanalysis_rdf(trajectory_filename, rmax, rmin, nbins):
    trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
    frame = trajectory[0]
    topology = MDAnalysis.core.topology.Topology(n_atoms=frame.n_atoms)
    u = MDAnalysis.as_Universe(topology, trajectory_filename)
    code = """rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms, nbins=nbins, range=(rmin, rmax)).run()"""
    return time_statement(code, MDAnalysis=MDAnalysis, u=u, rmax=rmax, rmin=rmin, nbins=nbins)
[10]:
# Test timing functions
params = dict(
    trajectory_filename=trajectory_filename,
    rmax=rmax,
    rmin=rmin,
    nbins=nbins)

def system_size(trajectory_filename, **kwargs):
    with gsd.hoomd.open(params['trajectory_filename'], 'rb') as trajectory:
        return {'frames': len(trajectory),
                'particles': len(trajectory[0].particles.position)}

print(system_size(**params))
freud_rdf_runtime = time_freud_rdf(**params)
print('freud:', freud_rdf_runtime)
mdanalysis_rdf_runtime = time_mdanalysis_rdf(**params)
print('MDAnalysis:', mdanalysis_rdf_runtime)
{'frames': 5, 'particles': 15625}
freud: (2.8377244066666663, 0.016533837168724797)
MDAnalysis: (18.35462979233333, 0.058068162708303186)

Perform Measurements

[11]:
def measure_runtime_scaling_rmax(rmaxes, **params):
    result_times = []
    for rmax in tqdm(rmaxes):
        params.update(dict(rmax=rmax))
        freud.parallel.setNumThreads(1)
        freud_single = time_freud_rdf(**params)
        freud.parallel.setNumThreads(0)
        result_times.append((freud_single, time_freud_rdf(**params), time_mdanalysis_rdf(**params)))
    return np.asarray(result_times)
[12]:
def plot_result_times(result_times, rmaxs, frames, particles):
    plt.figure(figsize=(6, 4), dpi=200)
    plt.errorbar(rmaxs, result_times[:, 0, 0], result_times[:, 0, 1], label="freud v{} density.RDF P=1".format(freud.__version__))
    plt.errorbar(rmaxs, result_times[:, 1, 0], result_times[:, 1, 1], label="freud v{} density.RDF P={}".format(freud.__version__, mp.cpu_count()))
    plt.errorbar(rmaxs, result_times[:, 2, 0], result_times[:, 2, 1], label="MDAnalysis v{} analysis.rdf.InterRDF".format(MDAnalysis.__version__))
    plt.title(r'RDF for {} frames, {} particles'.format(frames, particles))
    plt.xlabel(r'RDF $r_{{max}}$')
    plt.ylabel(r'Average Runtime (s)')
    plt.yscale('log')
    plt.legend()
    plt.show()
[13]:
rmaxes = [0.2, 0.3, 0.5, 1, 2, 3]
[14]:
result_times = measure_runtime_scaling_rmax(rmaxes, **params)
plot_result_times(result_times, rmaxes, **system_size(params['trajectory_filename']))
100%|██████████| 6/6 [00:28<00:00,  7.19s/it]
../../_images/examples_examples_Benchmarking_RDF_against_MDAnalysis_16_1.png
[15]:
print('Speedup, parallel freud / serial freud: {:.3f}x'.format(np.average(result_times[:, 0, 0] / result_times[:, 1, 0])))
print('Speedup, parallel freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 2, 0] / result_times[:, 1, 0])))
print('Speedup, serial freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 2, 0] / result_times[:, 0, 0])))
Speedup, parallel freud / serial freud: 2.575x
Speedup, parallel freud / MDAnalysis: 7.279x
Speedup, serial freud / MDAnalysis: 2.828x